{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|███████████████████████████████████████| 338M/338M [00:45<00:00, 7.79MiB/s]\n"
     ]
    }
   ],
   "source": [
    "import sys\n",
    "sys.path.append(\"../..\")\n",
    "from aips import get_engine\n",
    "from IPython.display import display, HTML\n",
    "from pyspark.sql import SparkSession\n",
    "import ipywidgets as widgets\n",
    "from PIL import Image\n",
    "import pickle\n",
    "import requests\n",
    "import numpy\n",
    "import torch\n",
    "import clip\n",
    "from io import BytesIO\n",
    "\n",
    "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
    "model, preprocess = clip.load(\"ViT-B/32\", device=device)\n",
    "\n",
    "engine = get_engine()\n",
    "spark = SparkSession.builder.appName(\"AIPS\").getOrCreate()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Cloning into 'tmdb'...\n",
      "remote: Enumerating objects: 7, done.\u001b[K\n",
      "remote: Counting objects: 100% (7/7), done.\u001b[K\n",
      "remote: Compressing objects: 100% (7/7), done.\u001b[K\n",
      "remote: Total 7 (delta 0), reused 6 (delta 0), pack-reused 0 (from 0)\u001b[K\n",
      "Receiving objects: 100% (7/7), 103.98 MiB | 10.46 MiB/s, done.\n",
      "Already up to date.\n",
      "movies_with_image_embeddings.pickle\n"
     ]
    }
   ],
   "source": [
    "![ ! -d 'tmdb' ] && git clone --depth 1 https://github.com/ai-powered-search/tmdb.git\n",
    "! cd tmdb && git pull\n",
    "! cd tmdb && mkdir -p '../data/tmdb/' && tar -xvf movies_with_image_embeddings.tgz -C '../data/tmdb/'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Wiping \"tmdb\" collection\n",
      "Creating \"tmdb\" collection\n",
      "Status: Success\n",
      "Adding LTR QParser for tmdb collection\n",
      "Adding LTR Doc Transformer for tmdb collection\n",
      "GET https://github.com/ai-powered-search/tmdb/raw/main/judgments.tgz\n",
      "GET https://github.com/ai-powered-search/tmdb/raw/main/movies.tgz\n",
      "Successfully written 65616 documents\n"
     ]
    }
   ],
   "source": [
    "#%run chapters/ch10/1.setup-the-movie-db.ipynb"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Listing 15.14"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Wiping \"tmdb_with_embeddings\" collection\n",
      "Creating \"tmdb_with_embeddings\" collection\n",
      "Status: Success\n",
      "Successfully written 7549 documents\n"
     ]
    }
   ],
   "source": [
    "from aips.spark import create_view_from_collection\n",
    "from aips.spark.dataframe import from_sql\n",
    "from pyspark.sql.window import Window\n",
    "from pyspark.sql.functions import col, split, regexp_replace\n",
    "\n",
    "def normalize_embedding(embedding):\n",
    "    return numpy.divide(embedding,\n",
    "      numpy.linalg.norm(embedding,axis=0)).tolist()\n",
    "\n",
    "def read(cache_name):\n",
    "    cache_file_name = f\"data/tmdb/{cache_name}.pickle\"\n",
    "    with open(cache_file_name, \"rb\") as fd:\n",
    "        return pickle.load(fd)\n",
    "\n",
    "def tmdb_with_embeddings_dataframe():\n",
    "    movies = read(\"movies_with_image_embeddings\")\n",
    "    embeddings = movies[\"image_embeddings\"]\n",
    "    normalized_embeddings = [normalize_embedding(e) for e in embeddings]\n",
    "    movies_dataframe = spark.createDataFrame(\n",
    "        zip(movies[\"movie_ids\"], movies[\"titles\"], \n",
    "            movies[\"image_ids\"], normalized_embeddings),\n",
    "        schema=[\"movie_id\", \"title\", \"image_id\", \"image_embedding\"])\n",
    "    return movies_dataframe\n",
    "\n",
    "embeddings_dataframe = tmdb_with_embeddings_dataframe()\n",
    "embeddings_collection = engine.create_collection(\"tmdb_with_embeddings\")\n",
    "embeddings_collection.write(embeddings_dataframe)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Setting up the lexical embedding index\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Wiping \"tmdb_lexical_plus_embeddings\" collection\n",
      "Creating \"tmdb_lexical_plus_embeddings\" collection\n",
      "Status: Success\n",
      "Successfully written 908 documents\n"
     ]
    }
   ],
   "source": [
    "def tmdb_lexical_embeddings_dataframe():\n",
    "    lexical_tmdb_collection = engine.get_collection(\"tmdb\")\n",
    "    create_view_from_collection(lexical_tmdb_collection, \"tmdb_lexical\")\n",
    "    embeddings_tmdb_collection = engine.get_collection(\"tmdb_with_embeddings\")\n",
    "    create_view_from_collection(embeddings_tmdb_collection, \"tmdb_with_embeddings\")\n",
    "    \n",
    "    joined_collection_sql = f\"\"\"    \n",
    "    SELECT lexical.id id, embeddings.movie_id movie_id, lexical.title title, lexical.overview overview, embeddings.image_embedding, embeddings.image_id\n",
    "    FROM tmdb_with_embeddings embeddings\n",
    "    INNER JOIN (SELECT DISTINCT image_id from (SELECT movie_id, MIN(image_id) image_id from tmdb_with_embeddings GROUP BY movie_id ORDER BY image_id ASC)) distinct_images on embeddings.image_id = distinct_images.image_id\n",
    "    INNER JOIN tmdb_lexical lexical ON lexical.id = embeddings.movie_id\n",
    "    ORDER by lexical.id asc\n",
    "    \"\"\"\n",
    "    #collection = engine.create_collection(\"tmdb_lexical_plus_embeddings\")\n",
    "    joined_dataframe = from_sql(joined_collection_sql) \n",
    "    joined_dataframe = joined_dataframe.withColumn(\"image_embedding\", split(regexp_replace(col(\"image_embedding\"),\"\\\"\",\"\"), \",\"))\n",
    "    return joined_dataframe\n",
    "\n",
    "lexical_embeddings = tmdb_lexical_embeddings_dataframe()\n",
    "lexical_collection = engine.create_collection(\"tmdb_lexical_plus_embeddings\")\n",
    "lexical_collection.write(lexical_embeddings)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Listing 15.15"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_image(full_path, log=False):   \n",
    "    try:\n",
    "        if full_path.startswith(\"http\"):\n",
    "            response = requests.get(full_path)\n",
    "            image = Image.open(BytesIO(response.content))\n",
    "        else:\n",
    "            image = Image.open(full_path)\n",
    "        if log: print(\"File Found\")\n",
    "        return image\n",
    "    except:\n",
    "        if log: print(f\"No Image Available {full_path}\")\n",
    "        return []\n",
    "\n",
    "def movie_search(query_embedding, limit=8):\n",
    "    collection = engine.get_collection(\"tmdb_with_embeddings\")\n",
    "    request = {\n",
    "        \"query\": query_embedding,\n",
    "        \"query_fields\": [\"image_embedding\"],\n",
    "        \"return_fields\": [\"movie_id\", \"title\",\n",
    "                          \"image_id\", \"score\"],\n",
    "        \"limit\": limit,\n",
    "        \"quantization_size\": \"FLOAT32\"}\n",
    "    response = collection.search(**request)\n",
    "    return response\n",
    "    \n",
    "def encode_text(text, normalize=True):\n",
    "    text = clip.tokenize([text]).to(device)    \n",
    "    text_features = model.encode_text(text)\n",
    "    embedding = text_features.tolist()[0] \n",
    "    if normalize:\n",
    "        embedding = normalize_embedding(embedding)\n",
    "    return embedding\n",
    "    \n",
    "def encode_image(image_file, normalize=True):\n",
    "    image = load_image(image_file)\n",
    "    inputs = preprocess(image).unsqueeze(0).to(device)\n",
    "    embedding = model.encode_image(inputs).tolist()[0]\n",
    "    if normalize:\n",
    "        embedding = normalize_embedding(embedding)\n",
    "    return embedding\n",
    "\n",
    "def encode_text_and_image(text_query, image_file):    \n",
    "    text_embedding = encode_text(text_query, False)\n",
    "    image_embedding = encode_image(image_file, False)  \n",
    "    return numpy.average((normalize_embedding(\n",
    "        [text_embedding, image_embedding])), axis=0).tolist()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Listing 15.16"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_html(search_results, show_fields=True, display_header=None):\n",
    "    css = \"\"\"\n",
    "      <style type=\"text/css\">\n",
    "        .results { \n",
    "          margin-top: 15px; \n",
    "          display: flex; \n",
    "          flex-wrap: wrap; \n",
    "          justify-content: space-evenly; }\n",
    "        .field { font-size: 24px; position: relative; float: left; }\n",
    "        .title { font-size:32px; font-weight:bold; max-width:450px; word-wrap:break-all; line-height:32px; display: table-cell; vertical-align: bottom;}\n",
    "        .results .result { height: 250px; margin-bottom: 5px; }\n",
    "        .fields {height: 100%; }\n",
    "      </style>\"\"\"\n",
    "    \n",
    "    header = \"\"\n",
    "    if display_header: \n",
    "        header = f\"<div class='field' style='width:100%; font-size:32px; margin-bottom:20px'>{display_header}</div>\"\n",
    "    \n",
    "    results_html = \"\"\n",
    "    for movie in search_results[\"docs\"]:\n",
    "        image_file = f\"http://image.tmdb.org/t/p/w780/{movie['image_id']}.jpg\"\n",
    "        movie_link = f\"https://www.themoviedb.org/movie/{movie['movie_id']}\"\n",
    "        img_html = f\"<img title='{movie['title']}' class='result' src='{image_file}'>\"\n",
    "        if show_fields: results_html += f\"<div class='fields'><div class='title' style='height:65px;'>{movie['title']}</div><div class='field'>( score: {movie['score']} )</div>\"\n",
    "        results_html += f\"<div style='clear:left'><a class='title' href='{movie_link}' target='_blank'>{img_html}</a></div>\"\n",
    "        if show_fields: results_html += \"</div>\"\n",
    "    return f\"{css}{header}<div class='results' style='clear:left'>{results_html}</div>\"\n",
    "   \n",
    "def display_results(search_results, show_fields=True, display_header=None):    \n",
    "    output = widgets.Output()\n",
    "    with output:\n",
    "        display(HTML(get_html(search_results, show_fields, display_header))) \n",
    "    display(widgets.HBox(layout=widgets.Layout(justify_content=\"center\")), output)   "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "def search_and_display(text_query=\"\", image_query=None):\n",
    "    if image_query:\n",
    "        if text_query:\n",
    "            query_embedding = encode_text_and_image(text_query, image_query)\n",
    "        else:\n",
    "            query_embedding = encode_image(image_query)\n",
    "    else:\n",
    "        query_embedding = encode_text(text_query)\n",
    "    display_results(movie_search(query_embedding), show_fields=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Figure 15.5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "69e71a557f6442a680c3eee8f16a3113",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(layout=Layout(justify_content='center'))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c59f4e8f8af64ae6a53f1201bf532091",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Output()"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "search_and_display(text_query=\"singing in the rain\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9b230abbf36745b8b52c4eca0b38e175",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(layout=Layout(justify_content='center'))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b61d901765a94b91af1ed00eb996d9ae",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Output()"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "search_and_display(text_query=\"superhero flying\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Figure 15.6"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d20d4e32d72b48f8baad718079573eaf",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(layout=Layout(justify_content='center'))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9e955ab7f0ca42df9e176096cc4e0b5b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Output()"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "search_and_display(text_query=\"superheroes flying\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Figure 15.7"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "78ac36ab5b2b4008b769c12ed02eb899",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(layout=Layout(justify_content='center'))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "7b3ee8e84ab44ee7a400dd56ff449733",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Output()"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "search_and_display(image_query=\"chapters/ch15/delorean-query.jpg\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Figure 15.8"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "253b863576614247b5b44e4258993984",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(layout=Layout(justify_content='center'))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "dd38ebe05c6b46a99c7587a644c6cb50",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Output()"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "search_and_display(text_query=\"superhero\", image_query=\"chapters/ch15/delorean-query.jpg\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Listing 15.17"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Listing 15.17"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# %load -s reciprocal_rank_fusion ../../engines/Collection.py\n",
    "def reciprocal_rank_fusion(search_results, k = None):\n",
    "    if k is None: k = 60\n",
    "    scores = {}\n",
    "    for ranked_docs in search_results:\n",
    "        for rank, doc in enumerate(ranked_docs, 1):\n",
    "            scores[doc[\"id\"]] = scores.get(doc[\"id\"], 0)  + (1.0 / (k + rank))\n",
    "    sorted_scores = dict(sorted(scores.items(), key=lambda item: item[1], reverse=True))\n",
    "    return sorted_scores\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "def get_display_header(lexical_request=None, vector_request=None):\n",
    "    if lexical_request and vector_request:\n",
    "        return \"Hybrid Results:<br/>\" \\\n",
    "               + f\"  --Lexical Query: {lexical_request['query']}<br/>\" \\\n",
    "               + f\"  --Vector Query: [{vector_request['query'][0]}, {vector_request['query'][1]}, ... ]<br/>\"\n",
    "    elif lexical_request:\n",
    "        return f\"Lexical Query: {lexical_request['query']}<br/>\"\n",
    "    elif vector_request:\n",
    "        return f\"Vector Query: [{vector_request['query'][0]}, {vector_request['query'][1]}, ... ]<br/>\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "collection = engine.get_collection(\"tmdb_lexical_plus_embeddings\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Listing 15.18"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3b88d6376b3247a0afc59a901f2e6683",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(layout=Layout(justify_content='center'))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3b1288e1b5f64ca2a47a83ac82a5181e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Output()"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "dd266be470f542ee91f6d5c0499d16b5",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(layout=Layout(justify_content='center'))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b0547c1adce14682868134ab8e6284c4",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Output()"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "over_request_limit = 15\n",
    "base_query = {\"return_fields\": [\"id\", \"title\", \"id\", \"image_id\",\n",
    "                                \"movie_id\", \"score\", \"image_embedding\"],\n",
    "              \"limit\": over_request_limit,\n",
    "              \"order_by\": [(\"score\", \"desc\"), (\"title\", \"asc\")]}\n",
    "\n",
    "def lexical_search_request(query_text):\n",
    "    return {\"query\": query_text, \n",
    "            \"query_fields\": [\"title\", \"overview\"],   \n",
    "            \"default_operator\": \"OR\",    \n",
    "            **base_query}\n",
    "\n",
    "def vector_search_request(query_embedding):\n",
    "    return { \"query\": query_embedding, \n",
    "             \"query_fields\": [\"image_embedding\"],\n",
    "             \"quantization_size\": \"FLOAT32\",\n",
    "            **base_query}\n",
    "\n",
    "def display_lexical_search_results(query_text):\n",
    "    collection = engine.get_collection(\"tmdb_lexical_plus_embeddings\")\n",
    "    lexical_request = lexical_search_request(query_text)\n",
    "    lexical_search_results = collection.search(**lexical_request)\n",
    "    \n",
    "    display_results(lexical_search_results, display_header= \\\n",
    "                    get_display_header(lexical_request=lexical_request))\n",
    "    \n",
    "def display_vector_search_results(query_text):\n",
    "    collection = engine.get_collection(\"tmdb_lexical_plus_embeddings\")\n",
    "    query_embedding = encode_text(query_text)\n",
    "    vector_request = vector_search_request(query_embedding)\n",
    "    vector_search_results = collection.search(**vector_request)\n",
    "\n",
    "    display_results(vector_search_results, display_header= \\\n",
    "                    get_display_header(vector_request=vector_request))\n",
    "\n",
    "query = '\"' + \"singin' in the rain\" + '\"'\n",
    "display_lexical_search_results(query)\n",
    "display_vector_search_results(query)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "tags": []
   },
   "source": [
    "## Listing 15.19"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "def display_hybrid_search_results(text_query, limit=10):\n",
    "    lexical_request = lexical_search_request(text_query)\n",
    "    vector_request = vector_search_request(encode_text(text_query))    \n",
    "    hybrid_search_results = collection.hybrid_search(\n",
    "          [lexical_request, vector_request], limit=10,\n",
    "          algorithm=\"rrf\", algorithm_params={\"k\": 60})\n",
    "    display_results(hybrid_search_results,\n",
    "                    display_header=get_display_header(lexical_request, vector_request))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Listing 15.20"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3a3858052ed04ffb9e06010f7684f459",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(layout=Layout(justify_content='center'))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1d9f63fcd37d40d9a684cce925ec7ab0",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Output()"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "94bd32c489e549468f391c8f84cc07b8",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(layout=Layout(justify_content='center'))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "46553a1ca4654c59b880bf622aa2bb28",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Output()"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "query = \"the hobbit\"\n",
    "display_lexical_search_results(query)\n",
    "display_vector_search_results(query)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "07b2453a633e4569a13808db597d6da3",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(layout=Layout(justify_content='center'))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b9621959ba344943b823b92e173ce342",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Output()"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display_hybrid_search_results(query)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Listing 15.21"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "def lexical_vector_rerank(text_query, limit=10):\n",
    "    lexical_request = lexical_search_request(text_query)\n",
    "    vector_request = vector_search_request(encode_text(text_query))    \n",
    "    hybrid_search_results = collection.hybrid_search(\n",
    "        [lexical_request, vector_request],\n",
    "        algorithm=\"lexical_vector_rerank\", limit=limit)\n",
    "    header = get_display_header(lexical_request, vector_request)\n",
    "    display_results(hybrid_search_results, display_header=header)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "de0d565ba55546f2a9620c9b074f2ec9",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(layout=Layout(justify_content='center'))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3494464afe1b4f039fa1d9dc19314fc1",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Output()"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "lexical_vector_rerank(\"the hobbit\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
